# 0. initial setup ----
# clear workspace 
rm(list = ls()) 

# set seed
set.seed(11081984)

# set working directory
setwd(YOUR WORKING DIRECTORY)


## 0.1 define custom functions ----
check_install_load <- function() {
  installed_packs <- rownames(installed.packages())
  packs_to_install <- required_packs[!(required_packs %in% installed_packs)]
  if (length(packs_to_install) > 0) {
    install.packages(packs_to_install)
  }
  lapply(required_packs, require, character.only = TRUE)  
}


## 0.2 define and load required packages
required_packs <- c("tidyverse",
                    "readxl")
check_install_load()



# 1. data wrangling ----
# 1.1 load data sets ----
df_orig <- read_xlsx("data/survey_data.xlsx")
df_elec <- read_csv("data/election_results.csv")


## 1.2 identify respondents interviewed before March 26 ----
tmp <- df_orig %>% 
  dplyr::select(record, End_date) %>%
  mutate(end_date = as.Date(df_orig$End_date, "%m/%d/%y"),
                    end_date = as.integer(format(end_date, "%d"))) 
analysis_sub <- tmp$record[tmp$end_date < 26]
length(analysis_sub)/nrow(df_orig)
rm(tmp)


## 1.3 rename, label, recode and generate variables ----
df <- df_orig %>% rename(id = record,
                         Q5_pol_interest = Q5,
                         Q6_campaign = Q6, 
                         Q11_vpi_taxes = Q11r1, Q11_vpi_pensions = Q11r2, Q11_vpi_environ = Q11r3, Q11_vpi_education = Q11r4, Q11_vpi_immigration = Q11r5, Q11_vpi_interior = Q11r6,
                         Q12_vsi_taxes = Q12r1, Q12_vsi_pensions = Q12r2, Q12_vsi_environ = Q12r3, Q12_vsi_education = Q12r4, Q12_vsi_immigration = Q12r5, Q12_vsi_interior = Q12r6,
                         Q17_ptv_VVD = Q17r1, Q17_ptv_PVV = Q17r2, Q17_ptv_CDA = Q17r3, Q17_ptv_D66 = Q17r4, Q17_ptv_GL = Q17r5, Q17_ptv_SP = Q17r6, Q17_ptv_PvdA = Q17r7, Q17_ptv_CU = Q17r8, Q17_ptv_PvdD = Q17r9,
                         Q18_pid = Q18,
                         Q19_pid = Q19,
                         Q20_pid_party = Q20,
                         Q21_pid_strength = Q21,
                         Q23_party_vote = Q23,
                         Q24_VVD = Q24r1, Q24_PVV = Q24r2, Q24_CDA = Q24r3, Q24_D66 = Q24r4, Q24_GL = Q24r5, Q24_SP = Q24r6, Q24_PvdA = Q24r7, Q24_CU = Q24r8, Q24_PvdD = Q24r9, Q24_50plus = Q24r10, Q24_SGP = Q24r11, Q24_DK = Q24r12,
                         voter_party = VOTEDPARTYQ23,
                         coalition_party = COALITIONPARTNER,
                         Q29_vote_again = Q29) %>%
  # adjust PositionPARTYVOTED and PositionCOALPARTNER for VVD supporters
  mutate(ppi_1 = if_else(voter_party == 9, PositionCOALPARTNER1, PositionPARTYVOTED1),
         ppi_2 = if_else(voter_party == 9, PositionCOALPARTNER2, PositionPARTYVOTED2),
         cppi_1 = if_else(voter_party == 9, PositionPARTYVOTED1, PositionCOALPARTNER1),
         cppi_2 = if_else(voter_party == 9, PositionPARTYVOTED2, PositionCOALPARTNER2)) %>%
  # adjust scale of party positions to match that of respondents
  mutate(across(c(ppi_1, ppi_2, cppi_1, cppi_2),
                ~ case_when(. < 3 ~ .,
                            . > 2 ~ . + 1))) %>%
  # recode existing variables and generate new variables
  mutate(Q5_pol_interest = na_if(Q5_pol_interest, 88),
         Q5_pol_interest = Q5_pol_interest * (-1) + 5,
         Q6_campaign = na_if(Q6_campaign, 88),
         Q6_campaign = Q6_campaign * (-1) + 5,
         across(starts_with("Q17_"), ~ na_if(.x, 77)),
         across(starts_with("Q17_"), ~ na_if(.x, 88)),
         Q21_pid_strength = na_if(Q21_pid_strength, 88),
         pid_strength = case_when(Q18_pid == 1 ~ Q21_pid_strength,
                                  Q18_pid == 2 & Q19_pid == 1 ~ Q21_pid_strength,
                                  Q18_pid == 2 & Q19_pid == 2 ~ 4),
         Q29_vote_again = na_if(Q29_vote_again, 3),
         Q29_vote_again = recode(Q29_vote_again, "2" = 0, "1" = 1),
         treatment = case_when(!is.na(Q28ar1) ~ 1,
                               !is.na(Q28br1) ~ 2,
                               !is.na(Q28cr1) ~ 3),
         deal_logroll = treatment < 3,
         # label issues
         issue_1 = as.factor(hISSUE1),
         issue_1 = fct_recode(issue_1, "tax rates" = "1", 
                              "nitrogen emissions" = "3",
                              "acceptance requirement for schools" = "4",
                              "refugees" = "5"),
         issue_2 = as.factor(hISSUE2),
         issue_2 = fct_recode(issue_2, "retirement age" = "2",
                              "acceptance requirement for schools" = "4",
                              "refugees" = "5",
                              "prison sentences" = "6"),
         # recode outcome variable
         Q28_voter_deal_satis = case_when(!is.na(Q28ar1) ~ Q28ar1,
                                          !is.na(Q28br1) ~ Q28br1,
                                          !is.na(Q28cr1) ~ Q28cr1),
         Q28_voter_deal_dissatis = (Q28_voter_deal_satis * -1) + 10,
         voter_deal_satis_jitter = Q28_voter_deal_satis + (runif(nrow(.))-0.5),
         voter_deal_satis_jitter = case_when(voter_deal_satis_jitter > 10 ~ 10,
                                                voter_deal_satis_jitter <= 10 ~ voter_deal_satis_jitter)) %>%
  # select subset of variables
  dplyr::select(id, Q5_pol_interest, Q6_campaign, Q29_vote_again,
         starts_with(c("Q11_", "Q12_", "Q17_", "cppi_", "ppi_", "issue_")),
         contains("_pid"), pid_strength,
         Q23_party_vote, Q28_voter_deal_satis, Q28_voter_deal_dissatis, voter_deal_satis_jitter, 
         deal_logroll, treatment, voter_party, coalition_party) %>%
  # generate voter policy positions and issue salience
  mutate(vpi_1 = case_when(issue_1 == "tax rates" ~ Q11_vpi_taxes,
                           issue_1 == "nitrogen emissions" ~ Q11_vpi_environ,
                           issue_1 == "acceptance requirement for schools" ~ Q11_vpi_education,
                           issue_1 == "refugees" ~ Q11_vpi_immigration),
         vsi_1 = case_when(issue_1 == "tax rates" ~ Q12_vsi_taxes,
                           issue_1 == "nitrogen emissions" ~ Q12_vsi_environ,
                           issue_1 == "acceptance requirement for schools" ~ Q12_vsi_education,
                           issue_1 == "refugees" ~ Q12_vsi_immigration),
         vpi_2 = case_when(issue_2 == "retirement age" ~ Q11_vpi_pensions,
                           issue_2 == "acceptance requirement for schools" ~ Q11_vpi_education,
                           issue_2 == "refugees" ~ Q11_vpi_immigration,
                           issue_2 == "prison sentences" ~ Q11_vpi_interior),
         vsi_2 = case_when(issue_2 == "retirement age" ~ Q12_vsi_pensions,
                           issue_2 == "acceptance requirement for schools" ~ Q12_vsi_education,
                           issue_2 == "refugees" ~ Q12_vsi_immigration,
                           issue_2 == "prison sentences" ~ Q12_vsi_interior),
         # generate variable on hostility toward coalition partner
         hostility = case_when(coalition_party == 9 ~ Q17_ptv_VVD,
                               coalition_party == 1 ~ Q17_ptv_PVV,
                               coalition_party == 2 ~ Q17_ptv_CDA,
                               coalition_party == 3 ~ Q17_ptv_D66,
                               coalition_party == 4 ~ Q17_ptv_GL,
                               coalition_party == 5 ~ Q17_ptv_SP,
                               coalition_party == 6 ~ Q17_ptv_PvdA,
                               coalition_party == 7 ~ Q17_ptv_CU,
                               coalition_party == 8 ~ Q17_ptv_PvdD),
         hostility = hostility * -1 + 10)


## 1.4 add data on party strength ----
df_join_party <- df_elec %>% 
  rename(voter_party = party_id,
         ps = seats) %>% 
  dplyr::select(-party_name)
df_join_coalition <- df_elec %>%
  rename(coalition_party = party_id,
         cs = seats) %>% 
  dplyr::select(-party_name)
df <- df %>% 
  left_join(df_join_party, by = "voter_party") %>%
  left_join(df_join_coalition, by = "coalition_party")



# 2. prepare data for analysis ----
## 2.1 generate IVs of interest ----
df_analysis <- df %>% filter(!is.na(Q28_voter_deal_satis)) %>% 
  group_by(voter_party, coalition_party) %>% mutate(party_dyad = cur_group_id()) %>% ungroup() %>%
  mutate(dpi_1 = case_when(treatment == 1 ~ ppi_1,
                           treatment == 2 ~ cppi_1,
                           treatment == 3 ~ (ppi_1 + cppi_1)/2),
         dpi_2 = case_when(treatment == 1 ~ cppi_2,
                           treatment == 2 ~ ppi_2,
                           treatment == 3 ~ (ppi_2 + cppi_2)/2),
         # randomization i.a. for respondents with compromise treatment
         rand = runif(nrow(.)),
         # seat shares as party weights
         wvp = ps/(ps+cs),
         wcp = 1 - wvp,
         # relative issue salience
         wis_1 = vsi_1/(vsi_1 + vsi_2),
         # set relative issue emphasis to 0.5 if vis_1 = 0 and vis_2 = 0
         wis_1 = replace_na(wis_1, 0.5),
         wis_2 = 1 - wis_1,
         # deal position weighted by seat share
         wdpi_1 = case_when(treatment == 1 ~ ppi_1,
                            treatment == 2 ~ cppi_1,
                            treatment == 3 ~ ppi_1 * wvp + cppi_1 * wcp),
         wdpi_2 = case_when(treatment == 1 ~ cppi_2,
                           treatment == 2 ~ ppi_2,
                           treatment == 3 ~ ppi_2 * wvp + cppi_2 * wcp),
         # issue preferences and policy distance
         voter_pref = -abs(vpi_1 - wdpi_1) * wis_1 - abs(vpi_2 - wdpi_2) * wis_2,
         voter_deal_wdis = abs(vpi_1 - wdpi_1) * wis_1 + abs(vpi_2 - wdpi_2) * wis_2,
         voter_deal_edis = sqrt((vpi_1 - wdpi_1)^2 * wis_1 + (vpi_2 - wdpi_2)^2 * wis_2)) %>%
  # non-weighted issue preferences and policy distance
  mutate(voter_pref_non = -abs(vpi_1 - dpi_1) * wis_1 - abs(vpi_2 - dpi_2) * wis_2,
         voter_deal_dis = abs(vpi_1 - dpi_1) * wis_1 + abs(vpi_2 - dpi_2) * wis_2) %>%
  # simplify all positional measures to agree/neutral/disagree
  mutate(across(c(starts_with("vpi_"),
                  starts_with("ppi_"),
                  starts_with("cppi_")),
                ~ case_when(.x < 3 ~ -1,
                            .x == 3 ~ 0,
                            .x > 3 ~ 1),
                .names="{.col}_bin"),
         dpi_1_bin = case_when(treatment == 1 ~ ppi_1_bin,
                               treatment == 2 ~ cppi_1_bin,
                               treatment == 3 ~ (ppi_1_bin + cppi_1_bin)/2),
         dpi_2_bin = case_when(treatment == 1 ~ cppi_2_bin,
                               treatment == 2 ~ ppi_2_bin,
                               treatment == 3 ~ (ppi_2_bin + cppi_2_bin)/2),
         # issue preferences and policy distance
         voter_pref_bin = -abs(vpi_1_bin - dpi_1_bin) * wis_1 - abs(vpi_2_bin - dpi_2_bin) * wis_2,
         voter_deal_dis_bin = abs(vpi_1_bin - dpi_1_bin) * wis_1 + abs(vpi_2_bin - dpi_2_bin) * wis_2) %>%
  # keep non-missing observations
  drop_na(pid_strength,
          hostility,
          party_dyad,
          Q28_voter_deal_satis,
          deal_logroll,
          voter_pref) %>%
  # select subset of variables
  dplyr::select(id,
                voter_party,
                hostility,
                pid_strength,
                party_dyad,
                Q23_party_vote,
                Q5_pol_interest,
                Q6_campaign,
                starts_with(c("vpi_",
                              "ppi_",
                              "cppi_",
                              "wdpi_",
                              "dpi_",
                              "voter_pref",
                              "voter_sal",
                              "issue_",
                              "Q11_vpi_")),
                voter_deal_wdis,
                voter_deal_dis,
                voter_deal_dis_bin,
                voter_deal_edis,
                treatment,
                deal_logroll,
                Q28_voter_deal_satis,
                voter_deal_satis_jitter,
                Q29_vote_again)

rm(list=setdiff(ls(), c("df_analysis", 
                        "analysis_sub")))

## 2.2 save analysis data set ----
save(df_analysis,
     analysis_sub,
     file = "data/analysis_data.Rdata")
